#---- PCA -----
data_mx=metReport$data_mx
sampleAttr=metReport$sampleAttr
temp=prcomp(t(data_mx[getFill(data_mx)==1,]))
feature=metReport$input$sampleFeature
if(feature %in% colnames(sampleAttr)){
p.pca.3 = plot_ly(x=temp$x[,1], y=temp$x[,2], z=temp$x[,3], type="scatter3d", mode="markers",
color = sampleAttr[,feature],text=sampleAttr$sIDs) %>%
layout(title = "PCA runBatch+trimester",scene=list(camera=list(eye=list(x = 1, y =-1, z = 1.5))))
}else{
p.pca.3=plot_ly(x=1:10,y=1:10) %>% layout(title = "Not Applicable")
}
# invisible(mkdirs("QuanlityAssessment"))
# orca(p.pca.3,sprintf("/QuanlityAssessment/PCA.%s.%s.%s.pdf",metReport$input$level,feature,metReport$input$batchEffectCorrectMethod))
feature="consss_batch"
p.pca.4=plot_ly(x=temp$x[,1], y=temp$x[,2], z=temp$x[,3], type="scatter3d", mode="markers",
color = sampleAttr[,feature],text=sampleAttr$sIDs) %>%
layout(title = "PCA categorizedOutlier",scene=list(camera=list(eye=list(x = 1, y =-1, z = 1.5))))
# orca(p.pca.4,sprintf("/QuanlityAssessment/PCA.%s.%s.%s.pdf",metReport$input$level,feature,metReport$input$batchEffectCorrectMethod))
p.pca.3
p.pca.4
rm(list=setdiff(ls(),c(lsf.str(),"metReport","compAnno")))
#---- UMAP ----
require(cowplot)
require(svglite)
data_mx=metReport$data_mx
sampleAttr=metReport$sampleAttr
data_mx=imputeMissingValues(data_mx,data_mx)
n_neighbors=round(0.25*as.numeric(ncol(data_mx)))
min_dist=0.1
# plotfunction
plot.umap=function(data_mx,sampleAttr,feature){
config=umap.defaults
config$n_neighbors=n_neighbors
config$min_dist=min_dist
UMAP=umap::umap(t(as.matrix(data_mx)),config = config)
tmp=unlist(lapply(as.list(sampleAttr), class))
if (feature %in% names(tmp[tmp=="character"])){
temp3=umap(data_mx,n_neighbors,min_dist,labels = as.factor(sampleAttr[,feature]),
seed = 3,
legendtextsize = 9,dotsize = 2,axistextsize = 10,text = sampleAttr$sIDs, textlabelsize = 0)
}else if (feature %in% names(tmp[tmp=="numeric"])|feature =="mean_abs_zscore"|feature =="sd"){
if(feature=="mean_abs_zscore"){
sampleAttr[,"mean_abs_zscore"]=apply(data_mx,c(2), function(x) mean(abs(x),na.rm=T))
}else if(feature=="sd"){
sampleAttr[,"sd"]=apply(data_mx,c(2), function(x) sd(x,na.rm=T))
}
scores <- data.frame(UMAP$layout)
# feature=
# feature=replace(feature,feature==0,20)
scores[,feature]=sampleAttr[[feature]]
axistextsize=10
dotsize=2
legendtextsize=9
textlablesize=1
temp3 <- ggplot(data = scores, aes(x = X1, y = X2, label=sampleAttr$sIDs)) +
geom_point(aes_string(colour = feature),
size = dotsize) + theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.y = element_text(size = axistextsize,
colour = "black"),
axis.text.x = element_text(size = axistextsize,
colour = "black"),
axis.title.x = element_text(size = axistextsize),
axis.title.y = element_text(size = axistextsize),
legend.title = element_text(size = legendtextsize),
legend.text = element_text(size = legendtextsize)) +
# labs(colour = feature) +
scale_colour_gradientn(colors = greenred(10)) # +
# geom_text(vjust = "inward", hjust = "inward", size = textlablesize)
}else if (feature %in% c("salicylate","2-hydroxyhippurate (salicylurate)","heme","arginine","glucose")){
# config=umap.defaults
# config$n_neighbors=n_neighbors
# config$min_dist=min_dist
# UMAP=umap::umap(t(as.matrix(data_mx)),config = config)
scores <- data.frame(UMAP$layout)
feature = compAnno$COMP_ID[compAnno$BIOCHEMICAL==feature]
feature = feature[!is.na(feature)]
feature=data_mx[feature,]
scores$feature=feature
axistextsize=10
dotsize=2
legendtextsize=9
textlablesize=1
temp3 <- ggplot(data = scores, aes(x = X1, y = X2, label=sampleAttr$sIDs)) + geom_point(aes(colour = feature),
size = dotsize) + theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.text.y = element_text(size = axistextsize,
colour = "black"),
axis.text.x = element_text(size = axistextsize,
colour = "black"), axis.title.x = element_text(size = axistextsize),
axis.title.y = element_text(size = axistextsize),
legend.title = element_text(size = legendtextsize),
legend.text = element_text(size = legendtextsize)) # +
labs(colour = feature) + scale_colour_gradient(low = "blue",
high = "red")
}
return(temp3)
}
# by run batch * trimester
temp=umap(data_mx,n_neighbors,min_dist,labels = as.factor(sampleAttr$ASA_tri_chr),
seed = 3,
legendtitle = "",legendtextsize = 9,dotsize = 2,axistextsize = 10,text = sampleAttr$sIDs,textlabelsize = 0)
# by outlier group
feature="consss_batch"
p2=plot.umap(data_mx = data_mx,sampleAttr = sampleAttr,feature = feature)+
theme(legend.position = "bottom",legend.direction = "vertical",legend.title = element_blank())
# by mean absolute zscore
feature="mean_abs_zscore"
p3=plot.umap(data_mx = data_mx,sampleAttr = sampleAttr,feature = feature)+theme(legend.position = "bottom")+ labs(color='mean\nabsolute\nzscore')
# by zscore sd
feature="sd"
p4=plot.umap(data_mx = data_mx,sampleAttr = sampleAttr,feature = feature)+theme(legend.position = "bottom")
ply1=ggplotly(temp) %>% layout(legend = list(orientation = "h", x = 0., y = -0.05))
ply1$x$data[[5]]$text=""
ply2=ggplotly(p2) %>% layout(legend = list(orientation = "h", x = 0., y = -0.05))
ply2$x$data[[length(unique(sampleAttr$consss_batch))+1]]$text=""
ply3=ggplotly(p3) %>% layout(legend = list(orientation = "h", x = 0., y = -0.05))
ply4=ggplotly(p4) %>% layout(legend = list(orientation = "h", x = 0., y = -0.05))
subplot(ply1,ply3,
which_layout = 1,
nrows = 1)
# ggarrange(legend1,legend3,heights = 1)
subplot(ply2,ply4,
which_layout = 1,
nrows = 1)
# ggarrange(legend2,legend4,heights = c(0.1))
legend1=cowplot::get_legend(temp)
temp=temp + theme(legend.position = "none")
legend2=cowplot::get_legend(p2)
p2=p2+theme(legend.position = "none")
legend3=cowplot::get_legend(p3)
p3=p3+theme(legend.position = "none")
legend4=cowplot::get_legend(p4)
p4=p4+theme(legend.position = "none")
p=ggarrange(temp,p2,legend1,legend2,p3,p4,legend3,legend4,ncol=2, nrow=4, common.legend = F)
# p
#title(main = "Distribution of summary statistics per sample")
# ggsave(p,filename = sprintf("QuanlityAssessment/UMAP.%s.%s.%s.pdf",
# metReport$input$level,
# dim(sampleAttr)[1],
# metReport$input$batchEffectCorrectMethod),
# height = 16,width = 8,device = cairo_pdf)
# ggsave(p,filename = sprintf("QuanlityAssessment/UMAP.%s.%s.%s.svg",
# metReport$input$level,
# dim(sampleAttr)[1],
# metReport$input$batchEffectCorrectMethod),
# height = 16,width = 8,device = svglite)
To identify the metabolites perturbed in response to aspirin treatment, we compared metabolomics profiles in aspirin-2nd samples with placebo-2nd samples. Univariate analysis (UVA) using t-test and False Discovery Rate (FDR) correction for multiple testing yielded 28 significantly perturbed metabolites - Table S2
rm(list = ls())
require(devtools)
require(repmis)
require(xlsx)
invisible(source_data("https://github.com/BRL-BCM/ASPRE-analysis/raw/main/data/dataSet.RData?raw=True"))
source_url("https://github.com/BRL-BCM/ASPRE-analysis/raw/main/R/func.R")
#---- get full data_mx with outliers excluded ----
outliers=c(sampleAttr$sIDs[sampleAttr$consss_batch!="non-outlier"])
input=list(rmX="Yes",level="zscore",fillThreshold=0.2,DiffThr=0.5,
batchEffectCorrectMethod="none",
tri2only=FALSE,sampleFeature="ASA_tri_chr",
ASA2outliersBoxplot="Include all ASA 2nd trimester samples",measureBoxplot="mean",
ImputeMissingValues=TRUE,selectMets=c("glucose","heme"),
outliers=outliers)
temp=PEASA.getDataMxByInput(input)
metReport=list(data_mx=temp$data_mx,sampleAttr=temp$sampleAttr,input=input)
rm(list=setdiff(ls(),c(lsf.str(),"metReport","compAnno")))
#---- perform t-test ----
data_mx=metReport$data_mx
sampleAttr=metReport$sampleAttr
group=list()
for ( i in sort(unique(sampleAttr$ASA_tri_chr))){
group[[i]]=sampleAttr$sIDs[sampleAttr$ASA_tri_chr==i]
}
my_comparison=list(
c("PLACEBO - 1st Trimester", "PLACEBO - 2nd Trimester"),
c("PLACEBO - 1st Trimester", "ASA - 1st Trimester"),
c("ASA - 1st Trimester","ASA - 2nd Trimester"),
c("PLACEBO - 2nd Trimester","ASA - 2nd Trimester")
)
p.result=perform_all_t_tests.pair_wise(data_mx,group = group, p.adjust.method = "fdr", paired=FALSE, my_comparison = my_comparison)
yTh=-log10(0.05)
temp = data.frame(matrix(ncol = 7))
colnames(temp)=c("x","y","direction","pair","group1","group2","BIOCHEMICAL")
for (n in 1:length(p.result$comparison_pairs)){
df.vol = data.frame(cbind(x=p.result$meanDiff[,n],y=-log10(p.result$p.adj2[,n])))
df.vol$direction = rep ("Not Significant",nrow(df.vol))
for ( r in 1:nrow(df.vol)){
if (is.na(df.vol$y[r])){
df.vol$direction[r] = NA
}else{
if (df.vol$y[r] > yTh){
if (df.vol$x[r] <=0){
df.vol$direction[r] = "Down"
}else{
df.vol$direction[r] = "Up"
}
}
}
}
df.vol$pair=paste0(p.result$comparison_pairs[[n]],collapse = "&")
df.vol$group1=sprintf("base:%s",gsub("_"," ",p.result$comparison_pairs[[n]][1]))
df.vol$group2=sprintf("exprimental:%s",gsub("_"," ",p.result$comparison_pairs[[n]][2]))
df.vol$BIOCHEMICAL=compAnno$BIOCHEMICAL[match(rownames(data_mx),compAnno$COMP_ID)]
temp=rbind(temp,df.vol)
}
df.vol=temp[-1,]
metReport[["volcano.ASA_tri.df"]]=df.vol
rm(list=setdiff(ls(),c(lsf.str(),"metReport","compAnno")))
#--- Table S2 ----
require(DT)
require(openxlsx)
temp=metReport$volcano.ASA_tri.df[metReport$volcano.ASA_tri.df$pair=="PLACEBO_-_2nd_Trimester&ASA_-_2nd_Trimester",]
if(length(temp$direction!="Not Significant")!=0){
temp=temp[temp$direction!="Not Significant",]
}
temp=temp[,c("x","y","direction","BIOCHEMICAL")]
if(dim(temp)[1]!=0){
temp$`2nd Trimester Comparison`=TRUE
}else{
temp=NULL
}
tmp=unique(c(temp$BIOCHEMICAL))
MOF.df=matrix(ncol = 4)
MOF.df=data.frame(MOF.df)
colnames(MOF.df)=c("BIOCHEMICAL","Direction","delta Z","log10(p)")
for (i in 1:length(tmp)){
MOF.df[i,"BIOCHEMICAL"]=tmp[i]
MOF.df[i,"Direction"]=ifelse(tmp[i] %in% temp$BIOCHEMICAL,temp$direction[temp$BIOCHEMICAL==tmp[i]],"")
MOF.df[i,"delta Z"] = ifelse(tmp[i] %in% temp$BIOCHEMICAL,temp$x[temp$BIOCHEMICAL==tmp[i]],"")
MOF.df[i,"log10(p)"] = ifelse(tmp[i] %in% temp$BIOCHEMICAL,temp$y[temp$BIOCHEMICAL==tmp[i]],"")
}
Sub_Pathway=compAnno$SUB_PATHWAY[match(MOF.df$BIOCHEMICAL,compAnno$BIOCHEMICAL)]
HMDB_ID=compAnno$HMDB_ID[match(MOF.df$BIOCHEMICAL,compAnno$BIOCHEMICAL)]
MOF.df=cbind(cbind(Sub_Pathway,MOF.df),HMDB_ID=HMDB_ID)
ASA.mets.df=MOF.df
metReport[["ASA.mets.df"]]= ASA.mets.df
#draw table
if (nrow(MOF.df)!=0){
df = datatable(MOF.df[order(MOF.df$Sub_Pathway),],
rownames = FALSE,
caption = 'Table S2: Differential metabolites between aspirin-treated samples (n=20) and placebo-treated samples (n=54). *Direction of change and calculation of ∆ z-score is based on comparing aspirin-treated samples against placebo-treated samples. Metabolites are grouped by sub-pathways.',
options = list(scrollX = TRUE,fixedColumns = TRUE, #dom = 't',
pageLength = 10,
lengthMenu = c(10, 20, 50))) %>%
formatStyle(columns = colnames(.), fontSize = '50%') %>%
formatRound(columns=c('delta Z', 'log10(p)'), digits=3)
}else{
df = datatable(data.frame(messgae=c("Metaboloites with different missing rate between ASA and PLACEBO group not found.")),options = list(dom = 't'))
# print("Metaboloites with different missing rate between ASA and PLACEBO group not found.")
}
df
dir.output="aspirin.UVA"
invisible(mkdirs(dir.output))
# df.vol=metReport$volcano.ASA_tri.df
# save(df.vol,file=sprintf("%s/FourGroup.ttest.RData",dir.output))
# write.xlsx(MOF.df,file=sprintf("%s/Table.UVA.xlsx",dir.output))
We next performed multivariate analysis (MVA) utilizing Orthogonal Projections to Latent Structures Discriminant Analysis (OPLS-DA) model and reported compliance as continuous measure of aspirin administration. Modeling was done twice to refine the list of aspirin-induced metabolomics alterations, where the second-pass OPLS-DA modeled on 185 metabolites with Variable Importance on Projection (VIP) value >1 in the first-pass.
rm(list=ls())
dir.output="aspirin.MVA"
# invisible(mkdirs(dir.output))
require(ropls)
require(R.utils)
require(reshape2)
invisible(source_data("https://github.com/BRL-BCM/ASPRE-analysis/raw/main/data/dataSet.RData?raw=True"))
source_url("https://github.com/BRL-BCM/ASPRE-analysis/raw/main/R/func.R")
#---- get full data_mx with outliers excluded ----
outliers=c(sampleAttr$sIDs[sampleAttr$consss_batch!="non-outlier"])
input=list(rmX="Yes",level="zscore",fillThreshold=0.2,DiffThr=0.5,
batchEffectCorrectMethod="none",
tri2only=FALSE,sampleFeature="ASA_tri_chr",
ASA2outliersBoxplot="Include all ASA 2nd trimester samples",measureBoxplot="mean",
ImputeMissingValues=TRUE,selectMets=c("glucose","heme"),
outliers=outliers)
temp=PEASA.getDataMxByInput(input)
metReport=list(data_mx=temp$data_mx,sampleAttr=temp$sampleAttr,input=input)
rm(list=setdiff(ls(),c(lsf.str(),"metReport","compAnno")))
#---- First-pass make model for train-only samples to get permutation plot ----
dir.output="aspirin.MVA"
TrainSet=c("all","tri2")[2]
data_mx=metReport$data_mx
sampleAttr=metReport$sampleAttr
data_mx=t(data_mx)
if(TrainSet=="tri2"){
train.ind=which(sampleAttr$trimester=="2")
data_mx=data_mx[train.ind,]
sampleAttr=sampleAttr[train.ind,]
}
# set response as aspirin compliance
target=sampleAttr$Compliance....
names(target)=sampleAttr$sIDs
# make model, list feature variable importance
# oplsda=opls(data_mx, target,predI = 1, orthoI = NA, plotL=FALSE,permI=50)
load(sprintf("%s/ASA.oplsda.compliance.train-only.%s.RData",dir.output,TrainSet))
VIP=oplsda@vipVn
VIP=VIP[order(VIP,decreasing = T)]
VIP.df=data.frame(Subpathway=compAnno$SUB_PATHWAY[match(names(VIP),compAnno$COMP_ID)],
BIOCHEMICAL=compAnno$BIOCHEMICAL[match(names(VIP),compAnno$COMP_ID)],
Direction=ifelse(oplsda@loadingMN[match(names(VIP),rownames(oplsda@loadingMN)),]>0,"Up","Down"),
VIP=VIP,
Xloadings=oplsda@loadingMN[match(names(VIP),rownames(oplsda@loadingMN)),]
)
# save(oplsda,VIP,VIP.df,file=sprintf("%s/ASA.oplsda.compliance.train-only.%s.RData",dir.output,TrainSet))
rm(list=setdiff(ls(),c(lsf.str(),"metReport","compAnno")))
#---- Second-pass ----
dir.output="aspirin.MVA"
TrainSet=c("all","tri2")[2]
data_mx=metReport$data_mx
sampleAttr=metReport$sampleAttr
data_mx=t(data_mx)
if(TrainSet=="tri2"){
train.ind=which(sampleAttr$trimester=="2")
data_mx=data_mx[train.ind,]
sampleAttr=sampleAttr[train.ind,]
}
VIP0=loadToEnv(sprintf("%s/ASA.oplsda.compliance.train-only.%s.RData",dir.output,TrainSet))[["VIP"]]
VIP0=VIP0[VIP0>1]
data_mx=data_mx[,colnames(data_mx) %in% names(VIP0)]
# set target as compliance and make model
target=sampleAttr$Compliance....
names(target)=sampleAttr$sIDs
# oplsda=opls(data_mx, target,predI = 1, orthoI = NA, plotL=FALSE,permI=50)
load(sprintf("%s/ASA2.oplsda.compliance.train-only.%s.RData",dir.output,TrainSet))
VIP=oplsda@vipVn
VIP=VIP[order(VIP,decreasing = T)]
VIP.df=data.frame(COMP_ID=compAnno$COMP_ID[match(names(VIP),compAnno$COMP_ID)],
Subpathway=compAnno$SUB_PATHWAY[match(names(VIP),compAnno$COMP_ID)],
BIOCHEMICAL=compAnno$BIOCHEMICAL[match(names(VIP),compAnno$COMP_ID)],
Direction=ifelse(oplsda@loadingMN[match(names(VIP),rownames(oplsda@loadingMN)),]>0,"Up","Down"),
VIP=VIP,
Xloadings=oplsda@loadingMN[match(names(VIP),rownames(oplsda@loadingMN)),]
)
# save(oplsda,VIP,VIP.df,file=sprintf("%s/ASA2.oplsda.compliance.train-only.%s.RData",dir.output,TrainSet))
rm(list=setdiff(ls(),c(lsf.str(),"metReport","compAnno","TrainSet","dir.output")))
#---- OPLS-DA score plot ----
load(sprintf("%s/ASA2.oplsda.compliance.train-only.%s.RData",dir.output,TrainSet))
p=ropls.plot(oplsda,plottype = "score", xvar = "p1", yvar = "o1", hotelling=T)+theme_bw() +
labs(title = "OPLS Score plot of Training Set")
require(svglite)
# ggsave(p,filename = sprintf("%s/ASA2.scoreplot.svg",dir.output),height = 4,width = 4,device = svglite)
# ggsave(p,filename = sprintf("%s/ASA2.scoreplot.pdf",dir.output),height = 4,width = 4,device = cairo_pdf)
p1=p
#---- plot model R2Y Q2Y metrics ----
df=oplsda@summaryDF[,c("R2Y(cum)","Q2(cum)","pre","ort","pR2Y","pQ2")]
colnames(df)=c("R2Y","Q2","pre","ort","pR2Y","pQ2")
knitr::kable(oplsda@summaryDF)
| R2X(cum) | R2Y(cum) | Q2(cum) | RMSEE | pre | ort | pR2Y | pQ2 | |
|---|---|---|---|---|---|---|---|---|
| Total | 0.277 | 0.954 | 0.871 | 8.81 | 1 | 2 | 0.02 | 0.02 |
df=data.frame(oplsda@suppLs$permMN[,c("R2Y(cum)","Q2(cum)","sim")])
colnames(df)=c("R2Y(cum)","Q2(cum)","sim")
df[,"n"]=rownames(df)
df=melt(df,id.vars = c("n","sim"),variable.name = "performance")
p=ggplot(df,aes_string(x="sim",y="value",color ="performance")) + geom_point() + theme_bw() +
xlab("Similarity(response,perm-response)")
p=p+theme_bw()+theme(legend.position = c(0.75,0.3))+xlab("Reported compliance")+ylab("Predicted compliance") +labs(title = "Validation plot of the OPLS-DA model obtained from 50 permutation tests")
# ggsave(p,filename = sprintf("%s/ASA2.modelmetrics.svg",dir.output),height = 4,width = 4,device = svglite)
# ggsave(p,filename = sprintf("%s/ASA2.modelmetrics.pdf",dir.output),height = 4,width = 4,device = cairo_pdf)
p2=p
p1
p2
rm(list=setdiff(ls(),c(lsf.str(),"metReport","compAnno","TrainSet","dir.output")))
#---plot pheatmap----
dir.output="aspirin.MVA"
TrainSet=c("all","tri2")[2]
data_mx=metReport$data_mx
sampleAttr=metReport$sampleAttr
data_mx=t(data_mx)
if(TrainSet=="tri2"){
train.ind=which(sampleAttr$trimester=="2")
data_mx=data_mx[train.ind,]
sampleAttr=sampleAttr[train.ind,]
}
data_mx=t(scale(data_mx,center =T, scale = T))
# data_mx=t(scale(data_mx,center =T, scale = T))
# data_mx=t(data_mx)
# data_mx=t(apply(data_mx,1,function(x) scale(x,center = median(x,na.rm = T),scale = mad(x,na.rm = T)))) #rescale
VIP0.df=loadToEnv(sprintf("%s/ASA2.oplsda.compliance.train-only.%s.RData",dir.output,TrainSet))[["VIP.df"]]
VIP0.df=VIP0.df[VIP0.df$VIP>1,]
df=data_mx[VIP0.df$COMP_ID[order(VIP0.df$Subpathway)],]
# df=df[rownames(df)!="22185",]
df=t(df)
df=t(imputeMissingValues(df,df))
# df=t(scale(t(df),center = T,scale = T)) #rescale
# df=df[,sampleAttr$sIDs[order(sampleAttr$ASA_tri_chr)]]
rownames(df)=paste0(compAnno$SUB_PATHWAY[match(rownames(df),compAnno$COMP_ID)]," - ",compAnno$BIOCHEMICAL[match(rownames(df),compAnno$COMP_ID)])
require(pheatmap)
require(viridis)
plot.pheatmap.colannotation=function(df,sampleAttr,clusterSample=TRUE,showRownames=FALSE,showColnames=FALSE){
brks = quantile(df, probs = seq(.01, .99, .02), na.rm = TRUE)
my_sample_col=data.frame(ASA_tri_chr=sampleAttr[order(sampleAttr$ASA_tri_chr),c("ASA_tri_chr")])
rownames(my_sample_col)=sampleAttr$sIDs
colnames(my_sample_col)=c("Group")
my_colour = list(
Group = c("#66C2A5","#FC8D62")
)
names(my_colour[["Group"]])=unique(my_sample_col[,"Group"])
if(clusterSample){
x=pheatmap(df,
border_color= NA,
show_rownames= showRownames,
show_colnames = showColnames,
cluster_rows =T,
drop_levels = T,
breaks = brks,
color = inferno(length(brks) - 1),
# color = colorpanel(length(brks) - 1, "green", "black", "red"),
clustering_distance_cols = "euclidean",
clustering_distance_rows = "euclidean",
clustering_method = "mcquitty", #"mcquitty" "average"
annotation_col = my_sample_col,
# annotation_row = my_row_col,
annotation_colors = my_colour,
fontsize = 8)
}else{
x=pheatmap(df,
border_color= NA,
show_rownames= showRownames,
show_colnames = showColnames,
cluster_rows =T,
drop_levels = T,
breaks = brks,
# color = inferno(length(brks) - 1),
color = colorpanel(length(brks) - 1, "green", "black", "red"),
cluster_cols = FALSE,
# clustering_distance_cols = "euclidean", clustering_method = "mcquitty",
# clustering_distance_rows = "correlation", clustering_method = "average",
gaps_col = cumsum(sapply(sort(unique(gsub(" \\(.*$","",sampleAttr$ASA_tri_chr))), function(x) length(grep(x,sampleAttr$ASA_tri_chr)))),
annotation_col = my_sample_col,
# annotation_row = my_row_col,
annotation_colors = my_colour,
# drop_levels = TRUE,
fontsize = 8)
}
return(x)
}
# x=plot.pheatmap.colannotation(df,sampleAttr = sampleAttr,clusterSample=TRUE,showRownames=TRUE)
# dim(data_mx)
The current study included variability in reported compliance and variability in internal exposure to aspirin ascertained directly from the metabolomics data. We next asked if the joined and individual correlation of these factors with the outcome as preeclampsia would be consistent with protective effects of aspirin.
rm(list=ls())
require(ropls)
require(R.utils)
require(repmis)
require(devtools)
invisible(source_data("https://github.com/BRL-BCM/ASPRE-analysis/raw/main/data/dataSet.RData?raw=True"))
source_url("https://github.com/BRL-BCM/ASPRE-analysis/raw/main/R/func.R")
#---- get full data_mx with outliers excluded ----
outliers=c(sampleAttr$sIDs[sampleAttr$consss_batch!="non-outlier"])
input=list(rmX="Yes",level="zscore",fillThreshold=0.2,DiffThr=0.5,
batchEffectCorrectMethod="none",
tri2only=FALSE,sampleFeature="ASA_tri_chr",
ASA2outliersBoxplot="Include all ASA 2nd trimester samples",measureBoxplot="mean",
ImputeMissingValues=TRUE,selectMets=c("glucose","heme"),
outliers=outliers)
temp=PEASA.getDataMxByInput(input)
metReport=list(data_mx=temp$data_mx,sampleAttr=temp$sampleAttr,input=input)
rm(list=setdiff(ls(),c(lsf.str(),"metReport","compAnno")))
# #-----make 20-fold CV model----
dir.output="aspirin.MVA"
# invisible(mkdirs(dir.output))
TrainSet=c("all","tri2")[2]
data_mx=metReport$data_mx
sampleAttr=metReport$sampleAttr
data_mx=t(data_mx)
if(TrainSet=="tri2"){
train.ind=which(sampleAttr$trimester=="2")
data_mx=data_mx[train.ind,]
sampleAttr=sampleAttr[train.ind,]
}
data_mx=t(data_mx)
# require(caret)
# folds <- createFolds(1:dim(sampleAttr)[1], k =20, list = TRUE, returnTrain = FALSE)
# oplsda.cv=list()
# for (i in names(folds)){
# oplsda.cv[[i]]=opls(data_mx, target,predI = 1, orthoI = NA, plotL=FALSE, subset=(1:nrow(data_mx))[-folds[[i]]])
#
# }
# save(folds,oplsda.cv,file=sprintf("%s/ASA2.oplsda.cv.compliance.%s.RData",dir.output,TrainSet))
load(sprintf("%s/ASA2.oplsda.cv.compliance.%s.RData",dir.output,TrainSet))
Ypred.cv=rep(NA,dim(sampleAttr)[1])
names(Ypred.cv)=sampleAttr$sIDs
for (i in names(folds)){
Ypred.cv[rownames(oplsda.cv[[i]]@suppLs$yTesMN)]=as.vector(oplsda.cv[[i]]@suppLs$yTesMN)
}
Yactual=sampleAttr$Compliance....
names(Yactual)=sampleAttr$sIDs
require(caret)
rmse=RMSE(Ypred.cv,Yactual)
knitr::kable(data.frame(`Cross Validation 20-fold `=sprintf("RMSE: %.3f",rmse)))
| Cross.Validation.20.fold. |
|---|
| RMSE: 14.217 |
df=data.frame(sampleID=names(Ypred.cv),Actual=Yactual,Predicted=Ypred.cv,Subset="test",group=sampleAttr$ASA)
# df$group=paste0(sampleAttr$ASA_tri_chr,sep=" PE",as.factor(sampleAttr$pe))
df$group=paste0(ifelse(sampleAttr$ASA,"aspirin-treated","placebo-treated"),sep=", ",ifelse(sampleAttr$pe,"preeclamptic","non-preeclamptic"))
df$group=as.character(df$group)
df$group=gsub("ASA","aspirin",df$group)
df$group=gsub("PLACEBO","placebo",df$group)
save(df,file=sprintf("%s/InternalExposure-20CV.RData",dir.output))
# draw plot
p=ggplot(df, aes_string(x = "Predicted", y = "Actual",col="group",label="sampleID")) +
geom_point()
p=p+theme_bw()+theme(legend.position = c(0.35,0.5),
legend.title = element_blank()
# legend.text=element_text(size=8)
)+
xlab("Predicted Compliance")+ylab("Reported Compliance")
p1=p
# ggsave(p,filename = sprintf("%s/Aspirin.PredictedVsActual.PEoverlay.pdf",dir.output),height = 4,width = 4,device = cairo_pdf)
# ggsave(p,filename = sprintf("%s/Aspirin.PredictedVsActual.PEoverlay.svg",dir.output),height = 4,width = 4,device = svglite)
#---- predicted compliance independently predicts preeclampsia development ----
require(ggpubr)
require(reshape2)
temp=df[grep("aspirin",df$group),c("sampleID","Actual","Predicted","group")]
colnames(temp)=c("sampleID","Reported compliance","Predicted compliance","group")
temp=melt(temp,id.vars = c("sampleID","group"),variable.name = "type")
temp$group=factor(temp$group,levels = c("aspirin-treated, preeclamptic","aspirin-treated, non-preeclamptic"))
p = ggboxplot(temp, x = "group", y = "value",color = "group",
facet.by="type",palette = "npg")+
# stat_pvalue_manual(stat.test, label ="{p.adj}{p.adj.signif}", tip.length = 0.01) +
stat_compare_means( label = "p", label.x = 1.5,method = "t.test") +
ylab("value") +theme(axis.text.x=element_blank())
p2=p
# ggsave(p,filename = sprintf("%s/Aspirin.ReportedVsPredictedCompliance.PE.pdf",dir.output),height = 4,width = 4,device = cairo_pdf)
# ggsave(p,filename = sprintf("%s/Aspirin.ReportedVsPredictedCompliance.PE.svg",dir.output),height = 4,width = 4,device = svglite)
p1+labs(title = "Reported compliance plotted against internal exposure. Internal exposure is calculated as leave-2-out OPLS-DA model predicted compliance.")
p2+labs(title = "Distribution of reported compliance and internal exposure in aspirin-treated women with (n=10) and without (n=10) preeclampsia.")
require(pROC)
Ypred.cv=ifelse(Ypred.cv<50,"0","1")
Yactual=as.numeric(sampleAttr$ASA)
ROC=roc(Yactual,as.numeric(Ypred.cv))
# AIC
temp=df[grep("aspirin",df$group),c("sampleID","Actual","Predicted","group")]
colnames(temp)=c("sampleID","Reported compliance","Predicted compliance","group")
require(stringr)
require(MuMIn)
temp$PE=as.numeric(!grepl("non-",temp$group))
# ml.reportedCompliance=lm(PE~`Reported compliance`,temp)
glm.reportedCompliance=glm(PE~`Reported compliance`,family=binomial(link='logit'),temp)
AUC.reportedCompliance=auc(roc(temp$PE,glm.reportedCompliance$fitted.values))
# ml.predictedCompliance=lm(PE~`Predicted compliance`,temp)
glm.predictedCompliance=glm(PE~`Predicted compliance`,family=binomial(link='logit'),temp)
AUC.predictedCompliance=auc(roc(temp$PE,glm.predictedCompliance$fitted.values))
# ml.combinedCompliance=lm(PE~`Reported compliance`+`Predicted compliance`,temp)
glm.combinedCompliance=glm(PE~`Reported compliance`+`Predicted compliance`,family=binomial(link='logit'),temp)
AUC.combinedCompliance=auc(roc(temp$PE,glm.combinedCompliance$fitted.values))
AIC.df=data.frame(Model=c("Reported Compliance","Predicted compliance","Reported Compliance + Predicted compliance"),
K=c(1,1,2),
AUC=c(AUC.reportedCompliance,AUC.predictedCompliance,AUC.combinedCompliance),
AICc=c(AICc(glm.reportedCompliance),AICc(glm.predictedCompliance),AICc(glm.combinedCompliance))
)
require(openxlsx)
knitr::kable(AIC.df, caption = "Fig 3. Akaike information criteria (AIC) for the fitted logistic regression models. K, number of parameters; AUC, area under the ROC Curve; AICc, Akaike information criterion with correction for small sample size.")
| Model | K | AUC | AICc |
|---|---|---|---|
| Reported Compliance | 1 | 0.69 | 30.96359 |
| Predicted compliance | 1 | 0.85 | 23.08164 |
| Reported Compliance + Predicted compliance | 2 | 0.85 | 25.13552 |
# write.xlsx(AIC.df,file = sprintf("%s/ASA2.AIC.predCompliance2PE.xlsx",dir.output),rowNames=T)
rm(list=ls())
dir.output="PREG.MVA"
# invisible(mkdirs(dir.output))
require(ropls)
require(R.utils)
require(reshape2)
invisible(source_data("https://github.com/BRL-BCM/ASPRE-analysis/raw/main/data/dataSet.RData?raw=True"))
source_url("https://github.com/BRL-BCM/ASPRE-analysis/raw/main/R/func.R")
#---- get full data_mx with outliers excluded ----
outliers=c(sampleAttr$sIDs[sampleAttr$consss_batch!="non-outlier"])
input=list(rmX="Yes",level="zscore",fillThreshold=0.2,DiffThr=0.5,
batchEffectCorrectMethod="none",
tri2only=FALSE,sampleFeature="ASA_tri_chr",
ASA2outliersBoxplot="Include all ASA 2nd trimester samples",measureBoxplot="mean",
ImputeMissingValues=TRUE,selectMets=c("glucose","heme"),
outliers=outliers)
temp=PEASA.getDataMxByInput(input)
metReport=list(data_mx=temp$data_mx,sampleAttr=temp$sampleAttr,input=input)
rm(list=setdiff(ls(),c(lsf.str(),"metReport","compAnno")))
#---- make model for train-only to get permutation plot ----
dir.output="PREG.MVA"
# invisible(mkdirs(dir.output))
data_mx=metReport$data_mx
sampleAttr=metReport$sampleAttr
data_mx=t(data_mx)
train.ind=which(!sampleAttr$ASA| sampleAttr$trimester=="1")
data_mx=data_mx[train.ind,]
sampleAttr=sampleAttr[train.ind,]
target=sampleAttr$ga.w
names(target)=sampleAttr$sIDs
# oplsda=opls(data_mx, target,predI = 1, orthoI = NA, plotL=FALSE,permI=50)
# save(oplsda,file=sprintf("%s/PREG.oplsda.ga-w.train-only.tri1_and_placebo2.RData",dir.output))
#---- model performance table: permutation ----
# load(sprintf("%s/PREG.oplsda.ga-w.train-only.tri1_and_placebo2.RData",dir.output))
# knitr::kable(oplsda@summaryDF)
#---- make model for training and testing ----
data_mx=metReport$data_mx
sampleAttr=metReport$sampleAttr
data_mx=t(data_mx)
target=sampleAttr$ga.w
names(target)=sampleAttr$sIDs
train.ind=which(!sampleAttr$ASA | sampleAttr$trimester=="1")
# oplsda=opls(data_mx, target,predI = 1, orthoI = NA, subset=train.ind, plotL=FALSE)
load(sprintf("%s/PREG.oplsda.ga-w.train.tri1_and_placebo2.RData",dir.output))
VIP=oplsda@vipVn
VIP=VIP[order(VIP,decreasing = T)]
VIP.df=data.frame(Subpathway=compAnno$SUB_PATHWAY[match(names(VIP),compAnno$COMP_ID)],
BIOCHEMICAL=compAnno$BIOCHEMICAL[match(names(VIP),compAnno$COMP_ID)],
Direction=ifelse(oplsda@loadingMN[match(names(VIP),rownames(oplsda@loadingMN)),]>0,"Up","Down"),
VIP=VIP,
Xloadings=oplsda@loadingMN[match(names(VIP),rownames(oplsda@loadingMN)),]
)
# save(oplsda,VIP,VIP.df,file=sprintf("%s/PREG.oplsda.ga-w.train.tri1_and_placebo2.RData",dir.output))
#----Model performance: Goodness of fit table----
knitr::kable(oplsda@summaryDF,caption = "Performance of GA prediction model (metabolic clock)")
| R2X(cum) | R2Y(cum) | Q2(cum) | RMSEE | RMSEP | pre | ort | |
|---|---|---|---|---|---|---|---|
| Total | 0.205 | 0.935 | 0.865 | 1.18 | 2.11 | 1 | 2 |
#----plot predicted-actual with both training and testing----
predictedY=rep(NA,length(sampleAttr$sIDs))
subset=rep(NA,length(sampleAttr$sIDs))
for (i in 1:length(predictedY)){
if (i %in% train.ind){
j=which(train.ind==i)
predictedY[i]=oplsda@suppLs$yPreMN[j]
subset[i]="train"
}else{
j=which((1:length(oplsda@suppLs$yMCN))[-train.ind]==i)
predictedY[i]=oplsda@suppLs$yTesMN[j]
subset[i]="test"
}
}
df=data.frame(sampleID=rownames(oplsda@suppLs$yMCN),Actual=as.vector(oplsda@suppLs$yMCN),Predicted=predictedY,Subset=subset,group=sampleAttr$ASA_tri_chr[match(rownames(oplsda@suppLs$yMCN),sampleAttr$sIDs)])
df$group=as.character(df$group)
df$group=replace(df$group,grep("1st",df$group),"1st trimester, pre-treatment")
df$group=replace(df$group,grep("PLACEBO - 2nd Trimester",df$group),"2nd trimester, placebo-treated")
df$group=replace(df$group,grep("ASA - 2nd Trimester",df$group),"2nd trimester, aspirin-treated")
# draw plot
p=ggplot(df, aes_string(x = "Actual", y = "Predicted",label="sampleID")) + theme_bw()+theme(legend.position = c(0.31,0.88),legend.title = element_blank(),legend.background = element_rect(fill=alpha('white', 0.0)))+
xlab("True GA")+ylab("Predicted GA") +
geom_point(aes_string(col="group")) + geom_smooth(method='lm')
p1=p
# ggsave(p,filename = sprintf("%s/PREG.actual-predict.pdf",dir.output),height = 4,width = 4,device = cairo_pdf)
# ggsave(p,filename = sprintf("%s/PREG.actual-predict.svg",dir.output),height = 4,width = 4,device = svglite)
# ggplotly(p)
# density plot
colorby="group"
p = ggplot(df, aes_string(x="Predicted",fill=colorby))+
geom_density(alpha=0.4)
p= p + xlab("Predicted GA (weeks)") + ylab("Density") +theme_bw()+theme(legend.position = c(0.7,0.9),legend.title = element_blank(),legend.background = element_rect(fill=alpha('white', 0.1)))
p2=p
# ggsave(p,filename = sprintf("%s/PREG.PredictedGAdensity.pdf",dir.output),height = 4,width = 4,device = cairo_pdf)
# ggsave(p,filename = sprintf("%s/PREG.PredictedGAdensity.svg",dir.output),height = 4,width = 4,device = svglite)
#residual plot
model<-lm(Predicted~Actual,data=df[df$group!="2nd trimester, aspirin-treated",])
require(Metrics)
knitr::kable(data.frame(MAE=mae(df$Actual[df$group!="2nd trimester, aspirin-treated"],df$Predicted[df$group!="2nd trimester, aspirin-treated"]),
`R-squre`=(summary(model))$adj.r.squared))
| MAE | R.squre |
|---|---|
| 0.9354199 | 0.9343082 |
resd=df$Predicted-stats::predict(model,newdata=df)
names(resd)=df$sampleID
df$lm.resd=resd
require(ggpubr)
knitr::kable(df %>% group_by(group) %>% summarise(mean=mean(lm.resd)))
| group | mean |
|---|---|
| 1st trimester, pre-treatment | -0.0448741 |
| 2nd trimester, aspirin-treated | -1.2724468 |
| 2nd trimester, placebo-treated | 0.0581701 |
stat.mean=compare_means(lm.resd ~ group, data = df,
ref.group = ".all.", method = "t.test",p.adjust.method = "bonferroni") %>% rstatix::add_significance("p.adj")
df$group=factor(df$group,levels = c("1st trimester, pre-treatment","2nd trimester, aspirin-treated","2nd trimester, placebo-treated"))
p = ggboxplot(df, x = "group", y = "resd",col="group",add = "jitter") + #rotate_x_text(angle = 90) +
stat_compare_means(method = "anova", label.y = 6) + # Add global annova p-value
stat_pvalue_manual(stat.mean,label = "p.adj.signif",x="group2",y.position = 5)+
ylab("residual GA (weeks)") + xlab("") + theme(axis.text.x = element_blank(),legend.title = element_blank(), legend.direction = "vertical")
p3=p
# ggsave(p,filename = sprintf("%s/PREG.ResidualGA.pdf",dir.output),height = 4,width = 4,device = cairo_pdf)
# ggsave(p,filename = sprintf("%s/PREG.ResidualGA.svg",dir.output),height = 4,width = 4,device = svglite)
# save(df,file=sprintf("%s/PREG.actual_pred_resd.RData",dir.output))
p1+labs(title = "Gestational age estimation by metabolic clock trained on samples from women not treated by aspirin accurately reflects progression at early- and mid-pregnancy.")
p2+labs(title = "Aspirin-treated samples were estimated to have a younger gestational age then placebo-treated samples, depicted by the left-shift of green peak from blue peak (p=0.001265; t-test).")
p3+labs(title = "Gestational age acceleration is significantly deviated from 0 in aspirin-treated samples (Anova, p=6.7e-05; t-test, Bonferroni-corrected, p= 0.0083).")
# #residual plot
# model<-lm(Predicted~Actual,data=df[df$group!="2nd trimester, aspirin-treated",])
# require(Metrics)
# knitr::kable(data.frame(MAE=mae(df$Actual[df$group!="2nd trimester, aspirin-treated"],df$Predicted[df$group!="2nd trimester, aspirin-treated"]),
# `R-squre`=(summary(model))$adj.r.squared))
# resd=df$Predicted-stats::predict(model,newdata=df)
# names(resd)=df$sampleID
# df$lm.resd=resd
# require(ggpubr)
# knitr::kable(df %>% group_by(group) %>% summarise(mean=mean(lm.resd)))
# stat.mean=compare_means(lm.resd ~ group, data = df,
# ref.group = ".all.", method = "t.test",p.adjust.method = "bonferroni") %>% rstatix::add_significance("p.adj")
# df$group=factor(df$group,levels = c("1st trimester, pre-treatment","2nd trimester, aspirin-treated","2nd trimester, placebo-treated"))
# p = ggboxplot(df, x = "group", y = "resd",col="group",add = "jitter") + #rotate_x_text(angle = 90) +
# stat_compare_means(method = "anova", label.y = 6) + # Add global annova p-value
# stat_pvalue_manual(stat.mean,label = "p.adj.signif",x="group2",y.position = 5)+
# ylab("residual GA (weeks)") + xlab("") + theme(axis.text.x = element_blank(),legend.title = element_blank(), legend.direction = "vertical")
# p3=p
# ggsave(p,filename = sprintf("%s/PREG.ResidualGA.pdf",dir.output),height = 4,width = 4,device = cairo_pdf)
# ggsave(p,filename = sprintf("%s/PREG.ResidualGA.svg",dir.output),height = 4,width = 4,device = svglite)
# save(df,file=sprintf("%s/PREG.actual_pred_resd.RData",dir.output))
p1
p2
# p3
We next examined the specific metabolites that were (1) affected by aspirin treatment, and (2) predictive of clinical response to aspirin treatment; and (3) correlated with gestational age.
rm(list=ls())
dir.output="CombinedEffect"
# invisible(mkdirs(dir.output))
require(R.utils)
require(reshape2)
require(repmis)
require(devtools)
invisible(source_data("https://github.com/BRL-BCM/ASPRE-analysis/raw/main/data/dataSet.RData?raw=True"))
source_url("https://github.com/BRL-BCM/ASPRE-analysis/raw/main/R/func.R")
#---- get full data_mx with outliers excluded ----
outliers=c(sampleAttr$sIDs[sampleAttr$consss_batch!="non-outlier"])
input=list(rmX="Yes",level="zscore",fillThreshold=0.2,DiffThr=0.5,
batchEffectCorrectMethod="none",
tri2only=FALSE,sampleFeature="ASA_tri_chr",
ASA2outliersBoxplot="Include all ASA 2nd trimester samples",measureBoxplot="mean",
ImputeMissingValues=TRUE,selectMets=c("glucose","heme"),
outliers=outliers)
temp=PEASA.getDataMxByInput(input)
metReport=list(data_mx=temp$data_mx,sampleAttr=temp$sampleAttr,input=input)
rm(list=setdiff(ls(),c(lsf.str(),"metReport","compAnno")))
#---- prepare objects: data_mx, InternalExposure VIP of ASA.mets and PREG.mets ----
dir.output="CombinedEffect"
data_mx=metReport$data_mx
sampleAttr=metReport$sampleAttr
require(R.utils)
require(openxlsx)
# load Internal Exposure (IE)
temp=loadToEnv("aspirin.MVA/InternalExposure-20CV.RData")[["df"]]
InternalExposure=temp$Predicted
names(InternalExposure)=temp$sampleID
# load GA residual (acceleration) based on gastiational age clock
temp=loadToEnv(sprintf("PREG.MVA/PREG.actual_pred_resd.RData"))[["df"]]
GA.resd=temp$lm.resd
names(GA.resd)=temp$sampleID
#test model correlation
tmp=temp[sampleAttr$ASA_tri!="TRUE2",]
# cor.test(tmp$Actual,tmp$Predicted)
sampleAttr=sampleAttr[sampleAttr$trimester=="2"&sampleAttr$ASA,]
sample.ind=sampleAttr$sIDs
data_mx=data_mx[,sample.ind]
data_mx=imputeMissingValues(data_mx,data_mx)
InternalExposure=InternalExposure[sample.ind]
GA.resd=GA.resd[sample.ind]
# load molecule property re: ASA in PE 0/1 molucules
# ASA.res.met=read.xlsx("/Volumes/Seagate-2C/BaylorMHG/PE_ASA/1st_052820/UVA/ASA_PE_4group.xlsx",sheet = 1)
ASA.res.met=loadToEnv(sprintf("CombinedEffect/PE.ttest.tri2.RData"))[["df.vol"]]
# ASA.res.met=ASA.res.met[ASA.res.met$P.ASA_PE0.ASA_PE1<0.05,]
# ASA.res.met=ASA.res.met[order(ASA.res.met$P.ASA_PE0.ASA_PE1),]
ASA.res.met=ASA.res.met[order(ASA.res.met$y),]
ASA.res.met.p=ASA.res.met$y
names(ASA.res.met.p)=ASA.res.met$BIOCHEMICAL
# ASA.res.met.meanDiff=read.xlsx("/Volumes/Seagate-2C/BaylorMHG/PE_ASA/1st_052820/UVA/ASA_PE_4group.xlsx",sheet = 4)
ASA.res.met.meanDiff=ASA.res.met$x
# ASA.res.met.meanDiff=ASA.res.met.meanDiff$MeanDiff.ASA_PE1.ASA_PE0[match(ASA.res.met$BIOCHEMICA,ASA.res.met.meanDiff$BIOCHEMICAL)]
names(ASA.res.met.meanDiff)=ASA.res.met$BIOCHEMICAL
ASA.res.met=compAnno$COMP_ID[match(ASA.res.met$BIOCHEMICAL,compAnno$BIOCHEMICAL)]
# load VIPs in aspirin compliance prediction model
TrainSet="tri2"
ASA.met=loadToEnv(sprintf("aspirin.MVA/ASA.oplsda.compliance.train-only.%s.RData",TrainSet))[["VIP.df"]]
ASA.met.VIP=ASA.met$VIP
names(ASA.met.VIP)=ASA.met$BIOCHEMICAL
ASA.met.xloadings=ASA.met$Xloadings
names(ASA.met.xloadings)=ASA.met$BIOCHEMICAL
ASA.met=rownames(ASA.met)
ASA.meanDiff=loadToEnv("aspirin.UVA/FourGroup.ttest.RData")[["df.vol"]]
ASA.meanDiff=ASA.meanDiff[ASA.meanDiff$pair=="PLACEBO_-_2nd_Trimester&ASA_-_2nd_Trimester",]
# load VIPs in gestational age prediction model
PREG.met=loadToEnv("PREG.MVA/PREG.oplsda.ga-w.train.tri1_and_placebo2.RData")[["VIP.df"]]
PREG.met.VIP=PREG.met$VIP
names(PREG.met.VIP)=PREG.met$BIOCHEMICAL
PREG.met.xloadings=PREG.met$Xloadings
names(PREG.met.xloadings)=PREG.met$BIOCHEMICAL
PREG.met=rownames(PREG.met)
PREG.ttest=loadToEnv("aspirin.UVA/FourGroup.ttest.RData")[["df.vol"]]
PREG.ttest=PREG.ttest[PREG.ttest$pair=="PLACEBO_-_1st_Trimester&PLACEBO_-_2nd_Trimester",]
# prep new data_mx with IE, GA residual and pe as variable
require(ggcorrplot)
ASA.met=ASA.met[!ASA.met%in%c("1868")]
dm=data_mx[!rownames(data_mx) %in%c("1868"),sample.ind]
rownames(dm)=compAnno$BIOCHEMICAL[match(rownames(dm),compAnno$COMP_ID)]
dm=rbind(dm,InternalExposure)
dm=rbind(dm,pe=sampleAttr$pe)
dm=rbind(dm,GAresd=GA.resd)
dm=t(dm)
# corr <- cor(dm)
# p.mat <- cor_pmat(dm)
# save(corr,p.mat,file=sprintf("%s/corr_mx.RData",dir.output))
load(sprintf("%s/corr_mx.RData",dir.output))
target=c("InternalExposure","GAresd","pe")[1]
temp=p.mat[,target]
b=temp[temp<0.05]
#---- AIC matrix -> AIC delta.matrix (compare to IE)----
require(MuMIn)
tem=unique(c(colnames(dm),"InternalExposure"))
tem=tem[!tem %in% c("GAresd","pe")]
tem.og=tem
colnames(dm)=gsub("-","_",colnames(dm))
colnames(dm)=gsub("^[0-9]+","e",colnames(dm))
colnames(dm)=gsub("/","M",colnames(dm))
colnames(dm)=gsub(" ","N",colnames(dm))
colnames(dm)=gsub("\\(|\\)|\\:|\\,|\\[|\\]\\;","P",colnames(dm))
colnames(dm)=gsub("[[:punct:]]","P",colnames(dm))
tem=gsub("-","_",tem)
tem=gsub("^[0-9]+","e",tem)
tem=gsub("/","M",tem)
tem=gsub(" ","N",tem)
tem=gsub("\\(|\\)|\\:|\\,|\\[|\\]\\;","P",tem)
tem=gsub("[[:punct:]]","P",tem)
# AUC=list()
# lm.coeffient=list()
# AIC.mx=matrix(nrow = length(tem),ncol=length(tem))
# target=c("InternalExposure","GAresd","pe")[3]
# require(pROC)
# for (i in 1:length(tem)) {
# formula=as.formula(paste0(target, " ~ ",tem[i]))
# model=glm(formula,as.data.frame(dm),family=binomial(link='logit'),)
# # model=glm(formula,as.data.frame(dm),family = gaussian)
# # AIC[[tem[i]]]=AICc(model)
# AIC.mx[i,i]=AICc(model)
# lm.coeffient[[i]]=model$coefficients
# prob=predict(model,type=c("response"))
# AUC[[i]]=pROC::auc(roc(response=dm[,target],predictor = prob))
# j=i+1
# while (j<=length(tem)) {
# formula=as.formula(paste0(target," ~ ",tem[i]," + ",tem[j]))
# model=glm(formula,as.data.frame(dm),family=binomial(link='logit'),)
# # model=glm(formula,as.data.frame(dm),family = gaussian)
# # AIC[[paste0(tem[i],"+",tem[j])]]=AICc(model)
# AIC.mx[i,j]=AICc(model)
# AIC.mx[j,i]=AICc(model)
# j=j+1
# }
# }
# names(AUC)=tem.og
# names(lm.coeffient)=tem.og
# # ogLik(model)
# # AIC=unlist(AIC)
# # AIC=AIC[order(AIC)]
# dimnames(AIC.mx)=list(tem.og,tem.og)
# save(AIC.mx,AUC,lm.coeffient,file=sprintf("%s/AIC.mx.%sVariable.RData",dir.output,(length(tem.og))))
load(sprintf("%s/AIC.mx.%sVariable.RData",dir.output,(length(tem.og))))
AIC.delta=matrix(nrow = length(tem),ncol=length(tem))
for (i in 1:length(tem)) {
for (j in i:length(tem)){
AIC.delta[i,j]=AIC.mx[i,j]-AIC.mx[i,i]
AIC.delta[j,i]=AIC.mx[j,i]-AIC.mx[j,j]
}
}
dimnames(AIC.delta)=list(tem.og,tem.og)
AIC.single.delta=matrix(nrow = length(tem),ncol=length(tem))
for (i in 1:length(tem)) {
for (j in i:length(tem)){
AIC.single.delta[i,j]=AIC.delta[j,i]-AIC.delta[i,j]
}
}
dimnames(AIC.single.delta)=list(tem.og,tem.og)
AIC.single.delta[lower.tri(AIC.single.delta)]=AIC.single.delta[upper.tri(AIC.single.delta)]
temp=apply(AIC.single.delta,2,function(x) length(which(x<0)))
# AIC.single.delta[rownames(AIC.single.delta)%in%names(temp[order(temp)][1:5]),rownames(AIC.single.delta)%in%names(temp[order(temp)][1:5])]
#----plot----
compareto="InternalExposure"
AIC.delta=AIC.delta[!rownames(AIC.delta) %in% c("pe"),!rownames(AIC.delta) %in% c("pe")]
CP2.bits=AIC.delta[compareto,]
CP2.bits=CP2.bits[!names(CP2.bits)%in%c(compareto,"GAresd")]
CP.bits=AIC.delta[,compareto]
CP.bits=CP.bits[!names(CP.bits)%in%c(compareto,"GAresd")]
# (CP.bits-CP2.bits)[order((CP.bits-CP2.bits),decreasing = T)]
AIC.CombinedModel=AIC.mx[names(CP.bits),compareto]
label=names(CP.bits)
logp=ASA.meanDiff[match(names(CP.bits),ASA.meanDiff$BIOCHEMICAL),"y"]
meanDiff=ASA.meanDiff[match(names(CP.bits),ASA.meanDiff$BIOCHEMICAL),"x"]
AICpe=sapply(names(CP.bits),function(x) AIC.mx[x,x])
cor2IE=abs(corr[compareto,names(CP.bits)])
names(AUC)=tem.og
AUCpe=unlist(AUC)[names(CP.bits)]
lm.coeffient=unlist(lm.coeffient)
names(lm.coeffient)=tem.og
AUCpe.dir=AUCpe*ifelse(lm.coeffient[names(CP.bits)]>=0,-1,1)
PREG.VIP.dir=PREG.met.VIP*ifelse(PREG.met.xloadings>=0,1,-1)
ASA.VIP.dir=ASA.met.VIP*ifelse(ASA.met.xloadings>=0,1,-1)
# check distribution
# names(AUCpe)=tem.og[tem.og!=compareto]
# plot(x=logp,y=AICpe)
# plot(x=logp,y=AUCpe)
# plot(x=AICpe,y=AUCpe)
# plot(x=(unlist(lm.coeffient))[names(CP2.bits)],y=AUCpe)
df=data.frame(`Internal Exposure`=CP2.bits,
Compound=CP.bits[names(CP2.bits)],
pair=names(CP2.bits),
Compound.sigcor=ifelse(names(CP2.bits) %in% names(b),"Correlated to Internal Exposure","Not Correlated to Internal Exposure"),
AIC.CombinedModel=AIC.CombinedModel,
label=label,
AIC=AICpe,
AUC=AUCpe,
lm.coeffient=(unlist(lm.coeffient))[names(CP2.bits)],
AUCpe.dir=AUCpe.dir[names(CP2.bits)],
ASAres.p=ASA.res.met.p[names(CP2.bits)],
ASAres.meanDiff=ASA.res.met.meanDiff[names(CP2.bits)],
`ASA.log10(p)`=logp,
ASA.meanDiff=meanDiff,
cor2IE=cor2IE,
PREG.loadings=PREG.met.xloadings[names(CP2.bits)],
PREG.VIP=PREG.met.VIP[names(CP2.bits)],
# PREG.dir.loadings=ifelse(PREG.met.xloadings[names(CP2.bits)]>0,"up","down"),
PREG.VIP.dir=PREG.VIP.dir[names(CP2.bits)],
Compliance.loadings=ASA.met.xloadings[names(CP.bits)],
Compliance.VIP.dir=ASA.VIP.dir[names(CP.bits)],
Compliance.VIP=ASA.met.VIP[names(CP.bits)]
)
# check distribution
# plot(x=df$Compliance.VIP,y=df$Compliance.loadings)
# plot(x=df$PREG.VIP,y=df$PREG.loadings)
# df$pair=as.character(df$pair)
df$label=as.character(df$label)
# plot_ly(x=df$ASAres.meanDiff,y=-log10(df$ASAres.p),mode="markers",text=~label,type = "scatter")
# view ASAres volcano (not shown in html)
df$ASAres.logp=-log10(df$ASAres.p)
df$ASAres.colorbyAUC7=ifelse(df$AUC>0.7,"AUC>0.7","AUC<=0.7")
p= ggplotly(ggplot(df,aes_string(x="AUC",y="ASAres.logp",color="ASAres.colorbyAUC7",label="label"))+theme_bw()+
geom_hline(yintercept = -log10(0.05))+
geom_point()+
theme(legend.title = element_blank()))
# df$label[df$cor2IE<0.5|df$VIP<1]=""
# df$label[df$Compound.sigcor=="Not Correlated to Internal Exposure"|df$VIP<1]=""
# df$label[df$Compound.sigcor=="Not Correlated to Internal Exposure"|df$VIP<1]=""
require(ggrepel)
df$GA.VIP.color=ifelse(abs(df$PREG.VIP.dir)<=1,"Unperturbed",ifelse(df$PREG.VIP.dir>1,"Up-regulated","Down-regulated"))
df$GA.VIP.color=factor(df$GA.VIP.color,levels = c("Up-regulated","Unperturbed","Down-regulated"))
a="Significance in prevention outcome discrimination"
df$ASAXPREG.dir=ifelse(df$Compliance.VIP.dir*df$PREG.VIP.dir<0 & df$PREG.VIP>1,"opposite and >1","same")
p10=ggplot(data=df, aes_string(x="Compliance.VIP.dir", y="ASAres.logp",color="GA.VIP.color",label="label")) + theme_bw() +
# scale_color_gradient(low="blue", high="red") +
# scale_color_gradient2(low="blue", high="red",mid = "grey",midpoint = 0) +
scale_color_manual(values=c("#ff4040", "#999999", "#40a9ff"))+
geom_vline(xintercept = c(-1,1),linetype="dashed") +
geom_hline(yintercept = c(-log10(0.05)),linetype="dashed") +
geom_point(aes_string(size="PREG.VIP"))+
# scale_size_continuous(range = c(0.1, 3)) +
# labs(color="Correlation")+
labs(color="Direction of\ncorrelation with GA",size="Absolute value of\ncorrelation with GA")+
# xlab("VIP in compliance prediction")+
xlab("Variable association (measured by VIP value) with aspirin intake")+
ylab(bquote(`Significance for predicting prevention outcome `(-log[10](p))))+ #"Significance in prevention outcome discrimination (-log10(p),t-test)")
# ylim(-1, 1)+
xlim(-5, 4.5)+
geom_text_repel(direction = "y",
color=ifelse(subset(df,(ASAres.logp>-log10(0.05))&Compliance.VIP.dir>1)$ASAXPREG.dir=="opposite and >1","#505050","#999999"),
size=4.4,
family="Arial Narrow",
data = subset(df,(ASAres.logp>-log10(0.05))&Compliance.VIP.dir>1),
# nudge_x = 2.8 - subset(df, Compound.sigcor=="Correlated to Internal Exposure"&AUC>0.7)$VIP,
nudge_x = 2.8 - subset(df,(ASAres.logp>-log10(0.05))&Compliance.VIP.dir>1)$Compliance.VIP.dir,
hjust = 0)+
geom_text_repel(direction = "y",
size=4.4,
family="Arial Narrow",
color=ifelse(subset(df,(ASAres.logp>-log10(0.05))&Compliance.VIP.dir<(-1))$ASAXPREG.dir=="opposite and >1","#505050","#999999"),
data = subset(df,(ASAres.logp>-log10(0.05))&Compliance.VIP.dir<(-1)),
# nudge_x = 2.8 - subset(df, Compound.sigcor=="Correlated to Internal Exposure"&AUC>0.7)$VIP,
nudge_x = -4 - subset(df,(ASAres.logp>-log10(0.05))&Compliance.VIP.dir<(-1))$Compliance.VIP.dir,
# nudge_y = 3,
hjust = 1)+
# # geom_text(position = position_jitter(width = 0.1,height = 0.02))
theme(panel.grid.minor = element_line(size=0.25),
legend.position = c(0.85,0.85),
legend.background = element_rect(fill = "transparent"),
legend.spacing.y = unit(0, 'cm'),
legend.key.size = unit(0.4, "cm"),
text=element_text(size=18, family="Arial")
# panel.grid.major = element_line(colour = "white",size=0.75)
)
# ggsave(p10, filename = sprintf("%s/3cpwlogp.pdf",dir.output), device = cairo_pdf,width = 8.7, height = 12)
# ggsave(p10, filename = sprintf("%s/3cpwlogp.svg",dir.output), device = svglite,width = 8.7, height = 12)
#12
df$COMP_ID=compAnno$COMP_ID[match(df$label,compAnno$BIOCHEMICAL)]
df$venn=rep("None",nrow(df))
df$venn[df$Compliance.VIP>1]="Aspirin-altered"
df$venn[df$ASAres.p<0.05]="Differential in outcome"
df$venn[df$ASAres.p<0.05&df$Compliance.VIP>1]="Both"
df$venn=factor(df$venn,levels = c("Aspirin-altered","Differential in outcome","Both","None"))
p9=ggplot(
# data=subset(df,Compound.sigcor=="Correlated to Internal Exposure"&AUC>0.7),
# data=subset(df,df$Compliance.VIP>1|df$ASAres.p<0.05),
data=df,
aes_string(x="Internal.Exposure", y="Compound",color="venn",label="pair")) + theme_bw() +
# coord_fixed(ratio=1) +
scale_color_manual(values=c("#ff4040", "#f0d400", "#ff8800","grey"))+
# scale_color_gradient(low="blue", high="red") +
geom_hline(yintercept = 0,color="darkgrey") +
geom_vline(xintercept = 0,color="darkgrey") +
geom_abline(slope=1,intercept = 0,linetype="dashed") +
geom_point()+
# geom_point()+
# geom_abline(slope=1,intercept = -9.34,linetype="dashed") +
# xlim(-5.1,6)+
theme(legend.title = element_blank(),
legend.position = c(0.82,0.15),
legend.key.size = unit(0.3,"cm"),
legend.spacing.y = unit(0.1,"cm"),
text=element_text(size=17, family="Arial"),
legend.background = element_rect(fill=alpha("white",alpha = 0.8)))+
geom_text_repel(
# direction = "y",
color="#505050",
size=5.5,
family="Arial",
# data = subset(df,df$Compliance.VIP>1|df$ASAres.p<0.05),
data = subset(df,Compound>Internal.Exposure),
nudge_y = 0.9,
nudge_x = -6 - subset(df, Compound.sigcor=="Correlated to Internal Exposure"&AUC>0.7)$`Internal.Exposure`,
hjust = 0)+
xlab("Significance of internal exposure (bits)") +
ylab("Significance of compound (bits)")
p9 + labs(title = "Significance of individual metabolite in mediating intervention outcome compared to internal exposure, measured by information loss caused at the removal of the variable. When distributed on the right of line y=x, removal of internal exposure costs higher information loss than removal of the compound.") +theme(text = element_text(size = 10))
# pdf(file="allmetsASAASAres.pdf",height = 6,width = 5)
# p
# dev.off()
# ggsave(p9,filename = sprintf("%s/allmetsASAASAres.pdf",dir.output),height = 4,width = 8,device = cairo_pdf)
# ggsave(p9,filename = sprintf("%s/allmetsASAASAres.svg",dir.output),height = 4,width = 8,device = svglite)
#185
# p=ggplot(data=df, aes_string(x="Internal.Exposure", y="Compound",color="cor2IE",label="pair")) + theme_bw() +
# coord_fixed(ratio=1) +
# scale_color_gradient(low="blue", high="red") +
# # geom_point(color="red")+
# geom_point()+
# geom_hline(yintercept = 0,color="darkgrey") +
# geom_vline(xintercept = 0,color="darkgrey") +
# geom_abline(slope=1,intercept = 0,linetype="dashed") +
# # geom_abline(slope=1,intercept = -9.35,linetype="dashed") +
# xlim(-5.1,8)+
# geom_text_repel(direction = "y",
# # color="#505050",
# data = subset(df,Compound.sigcor=="Correlated to Internal Exposure"&AUC>0.7),
# nudge_x = 3.3 - subset(df, Compound.sigcor=="Correlated to Internal Exposure"&AUC>0.7)$`Internal.Exposure`,
# hjust = 0)+
# xlab("Significance of internal exposure (bits)") +
# ylab("Significance of compound (bits)")+
# labs(color="Correlation")+
# theme(legend.position = c(0.9,0.2))
#
# p
# pdf(file = "VaribaleBits185.pdf",width =10, height=7)
# p
# dev.off()
Over one-third (26/73) of these differential metabolites were altered by aspirin, including aspirin’s derivative metabolites salicylate and 4-hydroxyhippurate. Lipids (8/26) and steroids (6/26) occupied more than half of the list, reinforcing their association with pregnancy outcome and response to aspirin treatment. Figure 4C
p10 + labs(title = "Metabolites under the impact of aspirin, pregnancy and their associations with preeclampsia prevention.") +theme(text = element_text(size = 10))